#!/usr/bin/env python

import urllib
import sys
import time
import json

APIBASE = 'http://localhost/onki-light/rest/v1/'

# 1st arg: vocabulary id
vocab = sys.argv[1]
# 2nd arg: sampling interval (check every N search terms)
if len(sys.argv) > 2:
  sample = int(sys.argv[2])
else:
  sample = 1
# 3rd arg: base URL for REST API
if len(sys.argv) > 3:
  APIBASE = sys.argv[3]


def get_count(prefix, lang):
  url = APIBASE + "search?vocab=%s&lang=%s&query=%s" % (vocab, lang, urllib.quote(prefix))
  starttime = time.time()
  try:
    result = urllib.urlopen(url)
    count = len(json.load(result)['results'])
  except:
    print prefix, lang, url, sys.exc_info()
    sys.exit(1)
  endtime = time.time()
  
  return (count, int((endtime-starttime)*1000), url)

lineno = 0
for line in sys.stdin:
  prefix, lang = line.split("\t")
  prefix = prefix.strip()
  lang = lang.strip()
  
  lineno = lineno + 1
  if lineno % sample != 0: continue
  
  count, elapsed, url = get_count(prefix, lang)
  print "\t".join((prefix, lang, str(count), str(elapsed), url))

